使用go.Bar()函数可以绘制柱状图以及条形图,常用参数如下:
在Layout中对barmode参数进行设置,可以绘制出不同类型的柱状图:
# 2-1 Basic Bar Chart 基本柱状图
import plotly.graph_objects as go
animals = ['giraffes', 'orangutans', 'monkeys', 'elephant', 'zebra']
y1 = [10,15,20,18,12]
fig = go.Figure(data=go.Bar(x=animals,y=y1)) # 设置x/y轴数据
fig.show()
# 2-2 Styled Bar Chart 风格化柱状图
import plotly.graph_objects as go
animals = ['giraffes', 'orangutans', 'monkeys', 'elephant', 'zebra']
y1 = [10,15,20,18,12]
fig = go.Figure()
fig.add_trace(go.Bar(
x=animals, y=y1,
width=0.6, # 设置柱形宽度
opacity=0.8, # 设置不透明度
hoverinfo='x+y', # 设置鼠标指针交互显示文本
text=y1, # 设置显示文本
textfont=dict(
size=15,
color='rgba(0,0,0,0.5)',
family='Arial'), # 设置显示文本的字体、颜色、大小
textposition='inside', # 设置显示文本的位置
marker=dict(
color='#0097a7',
line=dict(color='#f9a825',width=1.5),
opacity=0.8), # 设置数据节点,包括颜色、线条、透明度等参数
))
fig.show()
数据节点参数marker是经常会使用到的参数,可以通过help(go.Bar.marker)来查看。关于marker的颜色设置:
Greys,YlGnBu,Greens,YlOrRd,Bluered,RdBu,Reds,Blues,Picnic,Rainbow,Portland,Jet,Hot,Blackbody, Earth,Electric,Viridis,Cividis# 思考:如何将柱形的颜色随y值的大小变化?——给color参数传递连续值
fig = go.Figure(data=go.Bar(
x=animals,y=y1,
marker=dict(
color=y1, # 将y1值传递给color参数(连续值)
colorscale=[[0, 'white'], [1, '#E15759']], # 设置颜色范围
showscale=True # 设置是否显示图例
)))
fig.show()
# 思考:如何将y值最大的柱形颜色和宽度设置成不同?——给color参数传递离散值
# 方法1. 可以通过numpy数组中argmax()来获取最大值索引
# 方法2. 可以使用pandas对象中idxmax()来获取最大值索引
# 自定义颜色列表,并修改其中一个值
import numpy as np
colors=['#607d8b',] * 5
colors[np.array(y1).argmax()] = '#e91e63'
# 自定义宽度列表,并修改其中一个值
width=[0.6] * 5
width[np.array(y1).argmax()]= 0.9
# 另一种写法
# import pandas as pd
# colors=['#607d8b',] * 5
# colors[pd.Series(y1).idxmax()] = '#e91e63'
# width=[0.6] * 5
# width[pd.Series(y1).idxmax()]=0.9
fig = go.Figure(data=go.Bar(
x=animals,y=y1,
marker=dict(color=colors), # 将自定义颜色列表传递给color参数
width=width # 将自定义宽度列表传递给width参数
))
fig.show()
# 2-3 Stacked Bar Chart 堆积柱状图
import plotly.graph_objects as go
animals=['giraffes', 'orangutans', 'monkeys', 'elephant', 'zebra']
y1 = [10,15,20,18,12]
y2 = [12,10,18,20,15]
fig = go.Figure()
fig.add_trace(go.Bar(x=animals, y=y1, name='SF Zoo'))
fig.add_trace(go.Bar(x=animals, y=y2, name='LA Zoo')) # 设置每个trace的名称
fig.update_layout(barmode='stack') # 更改barmode为stack
fig.show()
# 2-4 Grouped Bar Chart 柱状簇
import plotly.graph_objects as go
animals = ['giraffes', 'orangutans', 'monkeys', 'elephant', 'zebra']
y1 = [10,15,20,18,12]
y2 = [12,10,18,20,15]
fig = go.Figure()
fig.add_trace(go.Bar(x=animals, y=y1, name='SF Zoo'))
fig.add_trace(go.Bar(x=animals, y=y2, name='LA Zoo')) # 设置每个trace的名称
fig.update_layout(
barmode='group', # 更改barmode为group
bargap=0.2, # 设置bar之间的间距
bargroupgap=0.05, # 设置同一个group的bar之间的间距
)
fig.show()
# 2-5 Basic Horizontal Bar Chart
import plotly.graph_objects as go
animals=['giraffes', 'orangutans', 'monkeys', 'elephant', 'zebra']
y1=[10,15,20,18,12]
y2=[12,10,18,20,15]
fig = go.Figure()
fig.add_trace(go.Bar(y=animals, x=y1, name='SF Zoo', orientation='h'))
fig.add_trace(go.Bar(y=animals, x=y2, name='LA Zoo', orientation='h'))
# fig.update_traces(orientation='h') # 对于多个trace的相同设置可以通过.update_traces()方法来实现
fig.update_layout(barmode='stack')
fig.show()
导入数据文件'Sample - Superstore.xls',绘制水平柱状图,展示每个商品子类别(Sub-Category)的利润(Profit),并根据利润大小显示颜色。
# Step1. 导入数据文件'Sample - Superstore.xls',并查看数据
import pandas as pd
df = pd.read_excel('./Sample - Superstore.xls',sheet_name='Orders')
df.head()
| Row ID | Order ID | Order Date | Ship Date | Ship Mode | Customer ID | Customer Name | Segment | Country | City | ... | Postal Code | Region | Product ID | Category | Sub-Category | Product Name | Sales | Quantity | Discount | Profit | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | CA-2017-152156 | 2017-11-08 | 2017-11-11 | Second Class | CG-12520 | Claire Gute | Consumer | United States | Henderson | ... | 42420.0 | South | FUR-BO-10001798 | Furniture | Bookcases | Bush Somerset Collection Bookcase | 261.9600 | 2 | 0.00 | 41.9136 |
| 1 | 2 | CA-2017-152156 | 2017-11-08 | 2017-11-11 | Second Class | CG-12520 | Claire Gute | Consumer | United States | Henderson | ... | 42420.0 | South | FUR-CH-10000454 | Furniture | Chairs | Hon Deluxe Fabric Upholstered Stacking Chairs,... | 731.9400 | 3 | 0.00 | 219.5820 |
| 2 | 3 | CA-2017-138688 | 2017-06-12 | 2017-06-16 | Second Class | DV-13045 | Darrin Van Huff | Corporate | United States | Los Angeles | ... | 90036.0 | West | OFF-LA-10000240 | Office Supplies | Labels | Self-Adhesive Address Labels for Typewriters b... | 14.6200 | 2 | 0.00 | 6.8714 |
| 3 | 4 | US-2016-108966 | 2016-10-11 | 2016-10-18 | Standard Class | SO-20335 | Sean O'Donnell | Consumer | United States | Fort Lauderdale | ... | 33311.0 | South | FUR-TA-10000577 | Furniture | Tables | Bretford CR4500 Series Slim Rectangular Table | 957.5775 | 5 | 0.45 | -383.0310 |
| 4 | 5 | US-2016-108966 | 2016-10-11 | 2016-10-18 | Standard Class | SO-20335 | Sean O'Donnell | Consumer | United States | Fort Lauderdale | ... | 33311.0 | South | OFF-ST-10000760 | Office Supplies | Storage | Eldon Fold 'N Roll Cart System | 22.3680 | 2 | 0.20 | 2.5164 |
5 rows × 21 columns
# Step2. 数据分析:对Sub-Category进行分组,再对每组的Profit求和(数据聚合与分组操作——GroupBy机制)
data = df.groupby('Sub-Category')['Profit'].sum()
# df['Profit'].groupby(df['Sub-Category']).sum() # 另一种写法
print(data) # 返回Series:左侧是索引index,右侧是值values
print('\n',data.index) # 使用.index属性查看索引
print('\n',data.values) # 使用.values属性或者to_numpy()方法查看值(返回数组)
Sub-Category
Accessories 41936.6357
Appliances 18138.0054
Art 6527.7870
Binders 30221.7633
Bookcases -3472.5560
Chairs 26590.1663
Copiers 55617.8249
Envelopes 6964.1767
Fasteners 949.5182
Furnishings 13059.1436
Labels 5546.2540
Machines 3384.7569
Paper 34053.5693
Phones 44515.7306
Storage 21278.8264
Supplies -1189.0995
Tables -17725.4811
Name: Profit, dtype: float64
Index(['Accessories', 'Appliances', 'Art', 'Binders', 'Bookcases', 'Chairs',
'Copiers', 'Envelopes', 'Fasteners', 'Furnishings', 'Labels',
'Machines', 'Paper', 'Phones', 'Storage', 'Supplies', 'Tables'],
dtype='object', name='Sub-Category')
[ 41936.6357 18138.0054 6527.787 30221.7633 -3472.556 26590.1663
55617.8249 6964.1767 949.5182 13059.1436 5546.254 3384.7569
34053.5693 44515.7306 21278.8264 -1189.0995 -17725.4811]
# Step3. 绘制水平柱状图
import plotly.graph_objects as go
# 进阶:优化Layout,可以通过help(go.Layout)查看图形布局Layout的参数
fig.update_layout(
title='Profit by Sub-Category',
plot_bgcolor='white',
xaxis=dict(title='Profit',gridcolor='rgba(0,0,0,0.05)'),
yaxis=dict(title='Sub-Category')
)
fig.show()
# 思考:如何设置一个利润阈值?柱形的颜色由是否超过阈值来决定
fig.update_layout(
title='Profit by Sub-Category',
plot_bgcolor='white',
xaxis=dict(title='Profit',gridcolor='rgba(0,0,0,0.05)'),
yaxis=dict(title='Sub-Category'),
)
fig.show()
导入数据文件'Sample - Superstore.xls',使用堆积柱状图展示每个商品子类别(Sub-Category)中,不同细分客户(Segment)的利润(Profit)。
# Step1. 导入数据文件'Sample - Superstore.xls',并查看数据
import pandas as pd
df = pd.read_excel('./Sample - Superstore.xls',sheet_name='Orders')
df.shape, df.columns
((9994, 21),
Index(['Row ID', 'Order ID', 'Order Date', 'Ship Date', 'Ship Mode',
'Customer ID', 'Customer Name', 'Segment', 'Country', 'City', 'State',
'Postal Code', 'Region', 'Product ID', 'Category', 'Sub-Category',
'Product Name', 'Sales', 'Quantity', 'Discount', 'Profit'],
dtype='object'))
# Step2. 数据分析:从绘图角度来思考,一个图形fig中有三个trace(Segment),每个trace对应多个柱形(Sub-Category)
# 对Segment和Sub-Category两个指标进行分组,再对每组的Profit求和(数据聚合与分组操作——GroupBy机制)
data = df.groupby(['Segment','Sub-Category'])['Profit'].sum()
# df['Profit'].groupby([df['Segment'],df['Sub-Category']]).sum() # 另一种写法
data
Segment Sub-Category
Consumer Accessories 20735.9225
Appliances 6981.9282
Art 3454.3011
Binders 17995.5972
Bookcases -4435.6382
Chairs 13235.3319
Copiers 24083.7106
Envelopes 3264.4126
Fasteners 576.8008
Furnishings 7919.4227
Labels 3075.9884
Machines 2141.0618
Paper 15534.6436
Phones 23837.1147
Storage 7104.2004
Supplies -1657.5513
Tables -9728.0378
Corporate Accessories 12707.4805
Appliances 7429.8952
Art 2004.6477
Binders 6377.3201
Bookcases 638.4502
Chairs 8344.6565
Copiers 18990.2789
Envelopes 2571.2290
Fasteners 251.9030
Furnishings 3508.2077
Labels 1760.8273
Machines 703.0190
Paper 10361.5468
Phones 11766.2196
Storage 9131.0247
Supplies 338.9264
Tables -4906.4986
Home Office Accessories 8493.2327
Appliances 3726.1820
Art 1068.8382
Binders 5848.8460
Bookcases 324.6320
Chairs 5010.1779
Copiers 12543.8354
Envelopes 1128.5351
Fasteners 120.8144
Furnishings 1631.5132
Labels 709.4383
Machines 540.6761
Paper 8157.3789
Phones 8912.3963
Storage 5043.6013
Supplies 129.5254
Tables -3090.9447
Name: Profit, dtype: float64
# 得到的结果是一个MultiIndex(多重索引)的Series:左侧是两层索引index,右侧是值values
# data.index # 使用index属性查看多重索引
print(data.index.levels[0]) # 查看外层索引
print(data.index.levels[1]) # 查看内层索引
print(data['Consumer']) # 指定任一外层索引,查看内层Series
Index(['Consumer', 'Corporate', 'Home Office'], dtype='object', name='Segment')
Index(['Accessories', 'Appliances', 'Art', 'Binders', 'Bookcases', 'Chairs',
'Copiers', 'Envelopes', 'Fasteners', 'Furnishings', 'Labels',
'Machines', 'Paper', 'Phones', 'Storage', 'Supplies', 'Tables'],
dtype='object', name='Sub-Category')
Sub-Category
Accessories 20735.9225
Appliances 6981.9282
Art 3454.3011
Binders 17995.5972
Bookcases -4435.6382
Chairs 13235.3319
Copiers 24083.7106
Envelopes 3264.4126
Fasteners 576.8008
Furnishings 7919.4227
Labels 3075.9884
Machines 2141.0618
Paper 15534.6436
Phones 23837.1147
Storage 7104.2004
Supplies -1657.5513
Tables -9728.0378
Name: Profit, dtype: float64
# Step3. 绘制堆积柱状图
import plotly.graph_objects as go
fig=go.Figure()
fig.update(layout=dict(
title='Profit by Sub-Category & Segment',
yaxis_title='Profit',
barmode='stack',
))
fig.show()
# 思考:如何自定义颜色?
fig=go.Figure()
colors=['#76B7B2','#BAB0AC','#FF9DA7'] # 自定义颜色列表(离散值)
fig.update(layout=dict(
title='Profit by Sub-Category & Segment',
barmode='stack',
))
fig.show()